\documentclass{article}

\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{appendix}
\usepackage{float}
\usepackage{enumitem}
\usepackage{syntax}
\usepackage{hyperref}
\usepackage{listings}
\usepackage{microtype}
\usepackage[a4paper]{geometry}

\floatstyle{boxed}
\restylefloat{figure}

\setlength{\grammarindent}{7em}
%% Define a'' `field' separator for records.
\def\fld{\\\llap{,\quad}}%

\newlist{record}{description}{3}
\setlist[record]{itemsep=0pt, format=$-$\bfseries}

\newcommand{\rhu}{\rightharpoonup}
\newcommand{\N}{\mathbb{N}}

\newcommand{\lists}[1]{\left[{#1}\right]}
\newcommand{\nelists}[1]{\left[{#1}\right]^+}
\newcommand{\sets}[1]{\{{#1}\}}
\newcommand{\maybe}[1]{$()$ | {#1}}

\newcommand{\idsof}[1]{\mathcal{I}\!_#1}
\newcommand{\txids}{\idsof{\txs}}
\newcommand{\blockids}{\idsof{\blocks}}
\newcommand{\agentids}{\idsof{\agents}}
\newcommand{\epochids}{\idsof{\mathcal{E}}}
\newcommand{\slotids}{\idsof{\mathcal{S}}}
\newcommand{\updids}{\idsof{\mathcal{UP}}}
\newcommand{\stakeholderids}{\idsof{\mathcal{K}}}
\newcommand{\pubkey}{\chi_{\textbf{pub}}}
\newcommand{\signature}{\chi_{\textbf{sig}}}

\newcommand{\vsspubkey}{\mathcal{VSS}_{\chi_{\textbf{pub}}}}
\newcommand{\vssproof}{\mathcal{VSS}_{\textbf{proof}}}
\newcommand{\vsssec}{\mathcal{VSS}_{\textbf{sec}}}
\newcommand{\vssencshare}{\mathcal{VSS}_{\textbf{enc}}}
\newcommand{\vssdecshare}{\mathcal{VSS}_{\textbf{dec}}}
\newcommand{\agents}{\mathcal{A}}
\newcommand{\txs}{\mathcal{T}}
\newcommand{\txins}{\txs_{\textbf{in}}}
\newcommand{\txfeepol}{\txs_{\textbf{feepol}}}
\newcommand{\txpf}{\txs_{\textbf{proof}}}
\newcommand{\twit}{\mathcal{TW}}
\newcommand{\twitpk}{\twit_{\textbf{pk}}}
\newcommand{\twitscript}{\twit_{\textbf{script}}}
\newcommand{\twitredeem}{\twit_{\textbf{redeem}}}
\newcommand{\ssc}{\mathcal{SSC}}
\newcommand{\ssccomm}{\ssc_{\textbf{comm}}}
\newcommand{\sscopen}{\ssc_{\textbf{open}}}
\newcommand{\sscshare}{\ssc_{\textbf{share}}}
\newcommand{\ssccert}{\ssc_{\textbf{cert}}}
\newcommand{\sscpf}{\ssc_{\textbf{proof}}}
\newcommand{\dlg}{\mathcal{DLG}}
\newcommand{\dlglight}{\dlg^{\textbf{l}}}
\newcommand{\proxysig}{\dlg_{\textbf{sig}}}
\newcommand{\proxysiglight}{\dlg^{\textbf{l}}_{\textbf{sig}}}
\newcommand{\upd}{\mathcal{UP}}
\newcommand{\updprop}{\upd_{\textbf{prop}}}
\newcommand{\upddata}{\upd_{\textbf{data}}}
\newcommand{\updvote}{\upd_{\textbf{vote}}}
\newcommand{\blocks}{\mathcal{B}}
\newcommand{\blockver}{\blocks_{\textbf{ver}}}
\newcommand{\blockvermod}{\blocks_{\textbf{ver}\Delta}}
\newcommand{\bbodies}{\blocks_{\textbf{body}}}
\newcommand{\bheads}{\blocks_{\textbf{head}}}
\newcommand{\bheadpf}{\blocks_{\textbf{proof}}}
\newcommand{\bheadcons}{\blocks_{\textbf{cons}}}
\newcommand{\bheadex}{\blocks_{\textbf{headex}}}
\newcommand{\blocksig}{\blocks_{\textbf{sig}}}
\newcommand{\blockpf}{\blocks_{\textbf{proof}}}
\newcommand{\ebb}{\mathcal{B}^\prime}
\newcommand{\ebbhead}{\ebb_{\textbf{head}}}
\newcommand{\ebbheadcons}{\ebb_{\textbf{cons}}}

\newcommand{\hstype}[1]{\textbf{#1}}
\newcommand{\String}{\hstype{String}}
\newcommand{\Word}[1]{\hstype{Word#1}}
\newcommand{\hash}{\hstype{Hash}}
\newcommand{\Integer}{\mathbb{I}}
\newcommand{\Bool}{\mathbb{B}}
\newcommand{\Bytes}{\hstype{Bytes}}


\title{Cardano on-the-wire specification}
\author{Nicholas Clarke}
\begin{document}
\maketitle
\tableofcontents

\section{Introduction}

This document describes the binary serialisation formats used for the Cardano
blockchain.

It proceeds as follows: section \ref{sec:types} describes the core types which
must be serialised for the purposes of Cardano communication. We then detail the
requirements on the binary format in \ref{sec:reqs}. In section \ref{sec:binfmt}
we describe the explicit binary serialisation of the core types satisfying these
requirements.

Appendix \ref{sec:currentfmt} details the current binary format.

\subsection{Notation}

\begin{description}
\item[Lists] Lists of type $A$ are written as $\lists{A}$.
\item[Non-empty lists] Non-empty lists of type $A$ are written as $\nelists{A}$.
\item[Sets] Sets of type $A$ are written as $\sets{A}$.
\item[Maps] A map from items of type $A$ to items of type $B$ is written as
  $A\rhu B$.\footnote{We use this terminology for consistency with
    \cite{spiwack}, where these are partial functions.}
\item[Records] We denote records as:
  \begin{grammar}
    <record> = \{
    \fld fieldName :: type
    \fld fieldName2 :: type2
    \}
  \end{grammar}
\item[Variants] We denote variants as:
  \begin{grammar}
    <variant> = Option1 \alt Option2 \alt Option3.
  \end{grammar}
\item[Optional values] We denote an optional value of type $A$ as $\maybe{A}$
\end{description}

We will use capitalised words for variant constructors and lower-case
terms for field names, as per standard Haskell syntax.

\subsection{Relation to the abstract specification and to the Cardano implementation}

This document straddles the middle-ground between the rule-based specification
given in \cite{spiwack} and the concrete implementation at \cite{cardano}. As
such, it tries to compromise between the differing presentations of how the
blockchain is constructed.

In general, we take the following approach:

\begin{itemize}
\item Where possible, we take our nomenclature from \cite{spiwack}. So, for
  example, we refer to the type of transactions as $\txs$ rather than
  $\textbf{Tx}$. Where there is no name given in \cite{spiwack} we generally
  invent a new identifier in the similar style.
\item Structure, on the other hand, typically derives from the code in
  \cite{cardano}, with some exceptions:
  \begin{itemize}
  \item We drop \verb|newtype| wrappers.
  \item We flatten nested records where doing so would not lead to significant
    confusion (for example, by requiring convoluted or length names to
    contextualise fields).
  \item In general, we tend to be fairly free with translating between
    isomorphic representations where it increases the readability of this spec.
  \item We monomorphize any polymorphic variables to the type they are
    instantiated to when they are serialised.
  \item We ignore \verb|AsBinary| and its ilk. These provide `typed' wrappers
    around \verb|ByteString| in order to defer encoding/decoding. They only
    confuse an abstract definition.
  \end{itemize}
\end{itemize}

\section{Core datatypes}
\label{sec:types}

The types in this section are derived from a combination of the blockchain spec
and Haskell datatypes in the codebase.

\subsection{Primitives}

Primitive types:

\begin{description}
\item [$\Bool$] Boolean value
\item [$\Word{8}$] 8-bit word
\item [$\Word{32}$] 32-bit word
\item [$\Word{64}$] 64-bit word
\item [$\String$] Arbitrary UTF8-encoded string type. We do not draw any
  distinctions as to whether this is implemented as a $\hstype{String}$ or
  $\hstype{Text}$.
\item [$\Bytes$] Arbitrary string of bytes.
\item [$\Integer$] arbitrary precision integers.
\end{description}

\subsection{Basic Cardano Types}

We start with sets of identifiers. These are represented in code as Blake2b-256
hashes. We also sometimes deal with these hashes directly.

\begin{itemize}
\item{Transaction identifiers $\txids$}
\item{Block identifiers $\blockids$}
\item Update identifiers $\updids$
\item Concrete hashes $\hash$
\end{itemize}

We also have addresses, which are represented as Blake2b-224 hashes.

\begin{itemize}
\item{Address (agent) identifiers $\agentids$}
\item Stakeholder identifiers $\stakeholderids$
\end{itemize}

Epochs are identified by their index as a $\Word{64}$. Slots are identified as a
pair of epoch identifier and a $\Word{16}$ index within the slot.

\begin{itemize}
\item Epoch indices $\epochids$
\item Slot indices $\slotids$
\end{itemize}

Cardano public keys are elliptic curve DH keys on curve 25519.

\begin{itemize}
\item Public keys $\pubkey$
\item Signatures $\signature$
\end{itemize}

\subsection{Addresses}

We have a set of addresses $\agents$. An address has an identifier, a type, and
some additional data held in an \verb|attributes| field. In particular, each
address contains a stake distribution $\agents_{\textbf{distr}}$.

\begin{figure}[H]
  \begin{grammar}
    <$\agents_{\textbf{distr}}$> = Bootstrap
      \alt SingleKey $\agentids$
      \alt MultiKey $\agentids \rhu \Word{64}$.

    <$\agents_{\textbf{type}}$> = PubKey \alt Script \alt Redeem \alt Unknown $\Word{8}$.

    <$\agents_{\textbf{attr}}$> = \{
    \fld pkDerivationPath :: $\maybe{\Bytes}$
    \fld stakeDistr :: $\agents_{\textbf{distr}}$
    \fld unparsed :: $\Word{8}\rhu\Bytes$
    \}.

    <$\agents$> = \{
    \fld root :: $\agentids$
    \fld attributes :: $\agents_{\textbf{attr}}$
    \fld type :: $\agents_{\textbf{type}}$
    \}.
  \end{grammar}

  \caption{Address Types}
\end{figure}

\subsection{Transactions}

We have a set of transactions $\txs$.

\begin{figure}[H]
  \caption{Transactions}
  \label{fig:txin}
  \begin{grammar}
    <$\txins$> = Valid $\txids\times\Word{32}$.
    \alt Invalid $\Word{8}\times\Bytes$.

    <$\txs$> = \{
    \fld inputs :: $\lists{\txins}$
    \fld outputs :: $\nelists{\agentids \times \Word{64}}$
    \}

    <$\txpf$> = \{
    \fld txCount :: $\Word{32}$
    \fld merkleRoot :: $\hash$
    \fld witnessHash :: $\hash$
    \}
  \end{grammar}
\end{figure}

We also have transaction witnesses, which provide proof that the transaction has
authority to spend its inputs.

\begin{figure}[H]
  \caption{Transaction witnesses}
  \begin{grammar}
    <$\twitpk$> = \{
    \fld key :: $\pubkey$
    \fld signature :: $\signature$
    \}

    <$\twitscript$> = \{
    \fld validator :: $\Word{16}\times\Bytes$
    \fld redeemer :: $\Word{16}\times\Bytes$
    \}

    <$\twitredeem$> = \{
    \fld key :: $\pubkey$
    \fld signature :: $\signature$
    \}

    <$\twit$> = PublicKey $\twitpk$
    \alt Script $\twitscript$
    \alt Redeem $\twitredeem$
    \alt Unknown $\Word{8}$ \Bytes
  \end{grammar}
\end{figure}

\subsection{Shared Seed Computation}

Shared seed computation deals with commitments, openings, shares and
certificates.

We start with some basic types used for VSS. VSS uses its own public key
cryptography scheme.

\begin{itemize}
\item VSS public keys $\vsspubkey$
\item VSS secret $\vsssec$
\item VSS encrypted and decrypted shares $\vssencshare$ and $\vssdecshare$
\item VSS secret proof $\vssproof$
\end{itemize}

\begin{figure}[H]
 \caption{Shared seed computation}
 \begin{grammar}
   <$\ssccomm$> = $\pubkey \times ((\vsspubkey \rhu \vssencshare) \times \vssproof)
                           \times \signature$

   <$\sscopen$> = $\stakeholderids \rhu \vsssec$

   <$\sscshare$> = $\stakeholderids \rhu (\stakeholderids \rhu \nelists{\vssdecshare})$

   <$\ssccert$> = \{
   \fld vsskey :: $\vsspubkey$
   \fld signingKey :: $\pubkey$
   \fld expiry :: $\epochids$
   \fld signature :: $\signature$
   \}

   <$\ssc$> = Commitments $\lists{\ssccomm} \times \lists{\ssccert}$
   \alt Openings $\sscopen \times \lists{\ssccert}$
   \alt Shares $\sscshare \times \lists{\ssccert}$
   \alt Certificates $\lists{\ssccert}$

   <$\sscpf$> = CommitmentsProof $\hash \times \hash$
   \alt OpeningsProof $\hash \times \hash$
   \alt SharesProof $\hash \times \hash$
   \alt CertificatesProof $\hash$
 \end{grammar}
\end{figure}

\subsection{Delegation}

The heavyweight delegation system is used to allow stakeholders to authorise
other parties to issue blocks on their behalf.

\begin{figure}[H]
  \begin{grammar}
    <$\dlg$> = \{
    \fld epoch :: $\epochids$
    \fld issuer :: $\pubkey$
    \fld delegate :: $\pubkey$
    \fld certificate :: $\signature$
    \}

    <$\proxysig$> = \{
    \fld psk :: $\dlg$
    \fld sig :: $\signature$
    \}

    <$\dlglight$> = \{
    \fld epoch :: $\epochids\times\epochids$
    \fld issuer :: $\pubkey$
    \fld delegate :: $\pubkey$
    \fld certificate :: $\signature$
    \}

    <$\proxysiglight$> = \{
    \fld psk :: $\dlglight$
    \fld sig :: $\signature$
    \}
  \end{grammar}
  \caption{Delegation}
  \label{fig:dlgtypes}
\end{figure}

\subsection{Updates}

This section covers the types used to orchestrate updates of the system. We also
introduce the block version and transaction fee policy in this section, since
they are only serialised as part of the update system.

\begin{figure}[H]
  \begin{grammar}
    <$\blockver$> = $\Word{16}\times\Word{16}\times\Word{8}$

    <$\txfeepol$> = Linear $\Integer$ $\Integer$
    \alt Unknown \Word{8} \Bytes

    <$\blockvermod$> = \{
    \fld scriptVersion     :: $\maybe{\Word{16}}$
    \fld slotDuration      :: $\maybe{\Integer}$
    \fld maxBlockSize      :: $\maybe{\Integer}$
    \fld maxHeaderSize     :: $\maybe{\Integer}$
    \fld maxTxSize         :: $\maybe{\Integer}$
    \fld maxProposalSize   :: $\maybe{\Integer}$
    \fld mpcThd            :: $\maybe{\Word{64}}$
    \fld heavyDelThd       :: $\maybe{\Word{64}}$
    \fld updateVoteThd     :: $\maybe{\Word{64}}$
    \fld updateProposalThd :: $\maybe{\Word{64}}$
    \fld updateImplicit    :: $\maybe{\Word{64}}$
    \fld softforkRule      :: $\maybe{(\Word64\times\Word64\times\Word64)}$
    \fld txFeePolicy       :: $\maybe{\txfeepol}$
    \fld unlockStakeEpoch  :: $\maybe{\epochids}$
    \}

    <$\upddata$> = \{
    \fld appDiffHash :: \hash
    \fld pkgHash :: \hash
    \fld updaterHash :: \hash
    \fld mdHash :: \hash
    \}

    <$\updprop$> = \{
    \fld blockVersion :: $\blockver$
    \fld blockVersionMod :: $\blockvermod$
    \fld softwareVersion :: $\String \times \Word{32}$
    \fld data :: $\String \rhu \upddata$
    \fld attributes :: $\Word{8} \rhu \Bytes$
    \fld from :: $\pubkey$
    \fld signature :: $\signature$
    \}

    <$\updvote$> = \{
    \fld voter :: $\pubkey$
    \fld proposalId :: $\updids$
    \fld vote :: $\Bool$
    \fld signature :: $\signature$
    \}

    <$\upd$> = \{
    \fld proposal :: $\maybe{\updprop}$
    \fld votes :: $\lists{\updvote}$
    \}
  \end{grammar}
  \caption{Updates}
  \label{fig:updtypes}
\end{figure}
\subsection{Blocks}

A block consists of a block header and a block body. The block header consists
of verification for the various components in the block body.

\begin{figure}[H]
  \caption{Blocks}
  \label{fig:block}
  \begin{grammar}
    <$\blocksig$> = Signature $\signature$
    \alt ProxySigLight $\proxysiglight$
    \alt ProxySigHeavy $\proxysig$

    <$\bheadcons$> = \{
    \fld slotId :: $\slotids$
    \fld leaderKey :: $\pubkey$
    \fld difficulty :: $\Word{64}$
    \fld signature :: $\blocksig$
    \}

    <$\bheadex$> = \{
    \fld blockVersion :: $\blockver$
    \fld softwareVersion :: $\String \times \Word{32}$
    \fld extraProof :: $\hash$
    \fld attributes :: $\String \rhu \emptyset$
    \}

    <$\blockpf$> = \{
    \fld txProof :: $\txpf$
    \fld sscProof :: $\sscpf$
    \fld dlgProof :: $\hash$
    \fld updProof :: $\hash$
    \}

    <$\bheads$> = \{
    \fld prevBlock :: $\blockids$
    \fld bodyProof :: $\blockpf$
    \fld consensusData :: $\bheadcons$
    \fld extraData :: $\bheadex$
    \}

    <$\bbodies$> = \{
    \fld txPayload :: $\lists{\txs\times\lists{\twit}}$
    \fld sscPayload :: $\ssc$
    \fld dlgPayload ::$\lists{\dlg}$
    \fld updPayload :: $\upd$
    \}

    <$\blocks$> = \{
    \fld header :: $\bheads$
    \fld body :: $\bbodies$
    \fld extra :: $\Word{8} \rhu \Bytes$
    \}

  \end{grammar}
\end{figure}

\subsubsection{Epoch Boundary Blocks}

In addition to regular blocks, epoch boundary blocks contain a list of slot
leaders for a given epoch. They are not conventionally distributed as part of
the blockchain, but can be requested as part of catch-up and as such form part
of the on-the-wire protocol.

\begin{figure}[H]
  \begin{grammar}
    <$\ebbheadcons$> = \{
    \fld epoch :: $\epochids$
    \fld chainDifficulty :: $\Word{64}$
    \}

    <$\ebbhead$> = \{
    \fld prevBlock :: $\blockids$
    \fld bodyProof :: $\hash$
    \fld consensusData :: $\ebbheadcons$
    \fld extraData :: $\Word{8} \rhu \Bytes$
    \}

    <$\ebb$> = \{
    \fld header :: $\ebbhead$
    \fld body :: $\nelists{\stakeholderids}$
    \fld extra :: $\Word{8} \rhu \Bytes$
    \}
  \end{grammar}
  \caption{Epoch Boundary Blocks}
  \label{fig:ebb}
\end{figure}

\section{Requirements on the binary format}
\label{sec:reqs}

\subsection{Cryptographic properties}

\subsection{Dependencies on the binary format}

\section{Binary specification}
\label{sec:binfmt}

\begin{thebibliography}{9}
\bibitem{spiwack}
  \textit{Rule-based specification of the blockchain logic}.
  Erik de Castro Lopo, Nicholas Clarke \& Arnaud Spiwack.

\bibitem{cardano}
  \textit{Cryptographic currency implementing Ouroboros PoS protocol}.
  \url{https://github.com/input-output-hk/cardano-sl/}

\bibitem{cbor}
  \textit{RFC 7049 Concise Binary Object Representation}.
  \url{http://cbor.io}

\bibitem{cddl}
  \textit{Concise data definition language (CDDL): a notational
    convention to express CBOR data structures}
  \url{https://tools.ietf.org/html/draft-ietf-cbor-cddl-00}

\end{thebibliography}

\begin{appendices}
  \section{Current binary format}
  \label{sec:currentfmt}

  This section documents the current binary serialisation format, as of
  2018-06-03.

  The current blockchain is serialized using CBOR\cite{cbor}. Consequently we
  present the current description as a CDDL\cite{cddl} document. Terms in use
  below should be interpreted in that context and with reference to Appendix E
  of \cite{cddl}, which defines a standard prelude available to such things.

  \lstinputlisting{current-spec.cddl}

\end{appendices}
\end{document}
